1 Effect of UPSTM-Based Decorrelation on Feature Discovery

1.1 Loading the libraries

library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)

op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

1.2 Material

Data Source https://quantdev.ssri.psu.edu/tutorials/intro-basic-exploratory-factor-analysis

“For this example, we use data from the web that are collected and distributed at https://openpsychometrics.org/_rawdata/. The data were obtained from 19,719 participants (rows) who provided answers to the Big Five Personality Test, constructed with items from the International Personality Item Pool. Data columns include gender, age, race, native language, country, and answers to the 50 likert rated statements (1-5;0 if missed; 1 was labeled as “strongly disagree”, 2 was labeled as “disagree”, 3 was labeled as “neither agree not disagree”, 4 was labeled as “agree” and 5 was labeled as “strongly agree”.) The original files can be obtaned at http://openpsychometrics.org/_rawdata/BIG5.zip

1.3 The Data

BigData <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/BigData.xlsx"))

BigData[BigData==0] <- NA 
BigData <- BigData[complete.cases(BigData),]
BigData <- BigData[BigData$age<100,]

BigData <- BigData[,-c(1,3,5,6,7)]

BigData$gender <- 1*(BigData$gender==1)


BigData$age <- log10(BigData$age)

1.3.1 Standarize the names for the reporting

studyName <- "PersonalityAge"
dataframe <- BigData
outcome <- "age"

thro <- 0.20
cexheat <- 0.35
loops <- 30

1.4 Generaring the report

1.4.1 Libraries

Some libraries

library(psych)
library(whitening)
library("vioplot")
library("rpart")
library(multiColl)
library(car)
library("pls")

source("C:/Users/jtame/Documents/GitHub/LatentBiomarkers/RMD/RepeatedLinearCV.R")

1.4.2 Data specs

pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
rows col
19303 51

varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]

largeSet <- length(varlist) > 1500 

1.4.3 Training and testing sets

set.seed(1)
trainsamples <- sample(nrow(dataframe),3*nrow(dataframe)/4)

trainingset <- dataframe[trainsamples,]
testingset <- dataframe[-trainsamples,]

pander::pander(t(summary(trainingset)))
             
age Min. :1.114 1st Qu.:1.255 Median :1.342 Mean :1.385 3rd Qu.:1.491 Max. :1.996
gender Min. :0.0000 1st Qu.:0.0000 Median :0.0000 Mean :0.3903 3rd Qu.:1.0000 Max. :1.0000
E1 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :2.644 3rd Qu.:4.000 Max. :5.000
E2 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :2.756 3rd Qu.:4.000 Max. :5.000
E3 Min. :1.000 1st Qu.:3.000 Median :4.000 Mean :3.424 3rd Qu.:4.000 Max. :5.000
E4 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :3.149 3rd Qu.:4.000 Max. :5.000
E5 Min. :1.000 1st Qu.:2.000 Median :4.000 Mean :3.436 3rd Qu.:5.000 Max. :5.000
E6 Min. :1.000 1st Qu.:1.000 Median :2.000 Mean :2.444 3rd Qu.:3.000 Max. :5.000
E7 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :2.873 3rd Qu.:4.000 Max. :5.000
E8 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :3.373 3rd Qu.:4.000 Max. :5.000
E9 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :3.102 3rd Qu.:4.000 Max. :5.000
E10 Min. :1.000 1st Qu.:3.000 Median :4.000 Mean :3.581 3rd Qu.:5.000 Max. :5.000
N1 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :3.259 3rd Qu.:4.000 Max. :5.000
N2 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :3.236 3rd Qu.:4.000 Max. :5.000
N3 Min. :1.000 1st Qu.:3.000 Median :4.000 Mean :3.844 3rd Qu.:5.000 Max. :5.000
N4 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :2.747 3rd Qu.:4.000 Max. :5.000
N5 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :2.948 3rd Qu.:4.000 Max. :5.000
N6 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :2.979 3rd Qu.:4.000 Max. :5.000
N7 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :3.155 3rd Qu.:4.000 Max. :5.000
N8 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :2.814 3rd Qu.:4.000 Max. :5.000
N9 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :3.134 3rd Qu.:4.000 Max. :5.000
N10 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :2.843 3rd Qu.:4.000 Max. :5.000
A1 Min. :1.000 1st Qu.:1.000 Median :2.000 Mean :2.309 3rd Qu.:3.000 Max. :5.000
A2 Min. :1.000 1st Qu.:3.000 Median :4.000 Mean :3.928 3rd Qu.:5.000 Max. :5.000
A3 Min. :1.000 1st Qu.:1.000 Median :2.000 Mean :2.171 3rd Qu.:3.000 Max. :5.000
A4 Min. :1.000 1st Qu.:4.000 Median :4.000 Mean :4.031 3rd Qu.:5.000 Max. :5.000
A5 Min. :1.000 1st Qu.:1.000 Median :2.000 Mean :2.164 3rd Qu.:3.000 Max. :5.000
A6 Min. :1.0 1st Qu.:3.0 Median :4.0 Mean :3.9 3rd Qu.:5.0 Max. :5.0
A7 Min. :1.000 1st Qu.:1.000 Median :2.000 Mean :2.157 3rd Qu.:3.000 Max. :5.000
A8 Min. :1.000 1st Qu.:3.000 Median :4.000 Mean :3.771 3rd Qu.:5.000 Max. :5.000
A9 Min. :1.000 1st Qu.:3.000 Median :4.000 Mean :3.944 3rd Qu.:5.000 Max. :5.000
A10 Min. :1.000 1st Qu.:3.000 Median :4.000 Mean :3.682 3rd Qu.:5.000 Max. :5.000
C1 Min. :1.000 1st Qu.:3.000 Median :3.000 Mean :3.321 3rd Qu.:4.000 Max. :5.000
C2 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :2.984 3rd Qu.:4.000 Max. :5.000
C3 Min. :1.000 1st Qu.:3.000 Median :4.000 Mean :3.986 3rd Qu.:5.000 Max. :5.000
C4 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :2.655 3rd Qu.:4.000 Max. :5.000
C5 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :2.701 3rd Qu.:4.000 Max. :5.000
C6 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :2.927 3rd Qu.:4.000 Max. :5.000
C7 Min. :1.000 1st Qu.:3.000 Median :4.000 Mean :3.653 3rd Qu.:5.000 Max. :5.000
C8 Min. :1.000 1st Qu.:2.000 Median :2.000 Mean :2.486 3rd Qu.:3.000 Max. :5.000
C9 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :3.226 3rd Qu.:4.000 Max. :5.000
C10 Min. :1.000 1st Qu.:3.000 Median :4.000 Mean :3.637 3rd Qu.:4.000 Max. :5.000
O1 Min. :1.0 1st Qu.:3.0 Median :4.0 Mean :3.7 3rd Qu.:5.0 Max. :5.0
O2 Min. :1.000 1st Qu.:1.000 Median :2.000 Mean :2.142 3rd Qu.:3.000 Max. :5.000
O3 Min. :1.000 1st Qu.:4.000 Median :4.000 Mean :4.129 3rd Qu.:5.000 Max. :5.000
O4 Min. :1.000 1st Qu.:1.000 Median :2.000 Mean :2.073 3rd Qu.:3.000 Max. :5.000
O5 Min. :1.000 1st Qu.:3.000 Median :4.000 Mean :3.874 3rd Qu.:5.000 Max. :5.000
O6 Min. :1.000 1st Qu.:1.000 Median :1.000 Mean :1.791 3rd Qu.:2.000 Max. :5.000
O7 Min. :1.000 1st Qu.:4.000 Median :4.000 Mean :4.076 3rd Qu.:5.000 Max. :5.000
O8 Min. :1.000 1st Qu.:2.000 Median :3.000 Mean :3.211 3rd Qu.:4.000 Max. :5.000
O9 Min. :1.000 1st Qu.:4.000 Median :4.000 Mean :4.143 3rd Qu.:5.000 Max. :5.000
O10 Min. :1.000 1st Qu.:3.000 Median :4.000 Mean :4.008 3rd Qu.:5.000 Max. :5.000

varlist <- colnames(trainingset)
varlist <- varlist[varlist != outcome]

1.4.4 Correlation Matrix of the Data

The heat map of the data

par(op)


  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  cormat <- cor(testingset[,varlist],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Original Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  pander::pander(max(abs(cormat)))

0.77

par(op)

1.4.5 Decorrelating using ILAA

ILAA bootstrapped training and testing sets

trainage_DE <- ILAA(trainingset,thr=thro,Outcome=outcome,verbose=TRUE)

fast | LM | N8 E1 E2 E3 E4 E5 E6 0.74 0.68 0.76 0.54 0.92 0.66

Included: 50 , Uni p: 0.003 , Base Size: 1 , Rcrit: 0.0228362

1 <R=0.525,thr=0.700>, Top: 1< 1 >Fa= 1,<|><>Tot Used: 2 , Added: 1 , Zero Std: 0 , Max Cor: 0.655

2 <R=0.514,thr=0.600>, Top: 5< 1 >Fa= 6,<|><>Tot Used: 12 , Added: 5 , Zero Std: 0 , Max Cor: 0.591

3 <R=0.470,thr=0.500>, Top: 8< 4 >Fa= 11,<|><>Tot Used: 29 , Added: 13 , Zero Std: 0 , Max Cor: 0.559

4 <R=0.373,thr=0.500>, Top: 1< 1 >Fa= 12,<|><>Tot Used: 31 , Added: 1 , Zero Std: 0 , Max Cor: 0.484

5 <R=0.366,thr=0.400>, Top: 4< 2 >Fa= 13,<|><>Tot Used: 36 , Added: 6 , Zero Std: 0 , Max Cor: 0.421

6 <R=0.339,thr=0.400>, Top: 2< 1 >Fa= 15,<|><>Tot Used: 36 , Added: 2 , Zero Std: 0 , Max Cor: 0.400

7 <R=0.329,thr=0.300>, Top: 13< 1 >Fa= 16,<|><>Tot Used: 48 , Added: 21 , Zero Std: 0 , Max Cor: 0.353

8 <R=0.277,thr=0.300>, Top: 6< 1 >Fa= 18,<|><>Tot Used: 48 , Added: 5 , Zero Std: 0 , Max Cor: 0.298

9 <R=0.257,thr=0.200>, Top: 13< 3 >Fa= 20,<|><>Tot Used: 49 , Added: 22 , Zero Std: 0 , Max Cor: 0.265

10 <R=0.226,thr=0.200>, Top: 7< 2 >Fa= 21,<|><>Tot Used: 49 , Added: 7 , Zero Std: 0 , Max Cor: 0.225

11 <R=0.221,thr=0.200>, Top: 2< 1 >Fa= 21,<|><>Tot Used: 49 , Added: 2 , Zero Std: 0 , Max Cor: 0.199

12 <R=0.199,thr=0.200>

[ 12 ], 0.1993877 Decor Dimension: 49 Nused: 49 . Cor to Base: 37 , ABase: 50 , Outcome Base: 0

#trainage_DE <- ILAA(trainingset,thr=thro,Outcome=outcome,verbose=TRUE,bootstrap=30)

testage_DE <- predictDecorrelate(trainage_DE,testingset)

1.4.6 The Formulas

Generating the formulas


theLaFormulas <- getLatentCoefficients(trainage_DE)

theCharformulas <- attr(theLaFormulas,"LatentCharFormulas")
pander::pander(as.matrix(theCharformulas))
La_E1 + E1 - (0.175)E5 - (0.411)E7
La_E2 + E2 + (0.403)E5 + (0.217)E7
La_E3 + E3 - (0.357)E5 - (0.299)E7
La_E4 - (0.297)E2 + E4 + (0.064)E5 + (0.139)E7 - (0.175)E8 - (0.190)E10
La_E6 - (0.409)E2 + (0.237)E5 + E6
La_E7 - (0.704)E5 + E7
La_E8 - (0.230)E2 + (0.258)E5 - (0.050)E7 + E8
La_E9 - (0.250)E5 + (0.575)E8 + E9
La_E10 + (0.353)E5 + (0.293)E7 + E10
La_N1 + N1 - (0.423)N6 - (0.199)N8
La_N2 + (0.318)N1 + N2 + (0.137)N6 + (0.085)N8
La_N3 - (0.488)N1 + N3
La_N4 + N4 + (0.048)N8 + (0.346)N10
La_N5 + N5 - (0.371)N6 - (0.176)N8
La_N6 + N6 - (0.525)N8
La_N7 + N7 - (0.738)N8
La_N9 - (0.185)N1 - (0.520)N6 - (0.161)N8 + N9
La_N10 + (0.172)E5 - (0.222)N6 - (0.409)N8 + N10
La_A1 - (0.073)E5 + A1 + (0.286)A4 - (0.307)A7
La_A2 - (0.144)E5 + A2 + (0.553)A7
La_A3 - (0.179)N8 + A3 + (0.363)A4
La_A5 + (0.276)A4 + A5 - (0.573)A7
La_A6 - (0.361)A4 + A6 - (0.269)A9
La_A7 + (0.238)E5 + (0.480)A4 + A7
La_A8 - (0.311)A4 + A8 - (0.226)A9
La_A9 - (0.682)A4 + A9
La_A10 - (0.321)E5 - (0.251)A4 + A10
La_C1 + C1 + (0.210)C2
La_C3 - (0.274)C1 + C3
La_C4 - (0.237)N8 - (0.200)C2 + C4 - (0.313)C6
La_C5 - (0.289)C1 + (0.196)C2 + C5 - (0.315)C9
La_C6 + (0.244)C1 - (0.472)C2 + C6
La_C7 - (0.130)C1 + (0.116)C2 + C7 - (0.298)C9
La_C8 - (0.187)N8 + (0.336)C1 + C8
La_C9 - (0.483)C1 + C9
La_C10 - (0.231)C1 - (0.271)C3 + C10
La_O2 + (0.329)O1 + O2
La_O3 - (0.153)O1 + O3 - (0.260)O5
La_O4 - (0.520)O2 + O4
La_O5 - (0.260)O1 + O5
La_O6 + (0.564)O3 + O6
La_O7 - (0.143)O1 + (0.183)O2 - (0.308)O5 + O7
La_O8 - (0.710)O1 + O8
La_O10 - (0.243)O3 - (0.651)O5 + O10

1.4.7 Formulas Network

Displaying the features associations

par(op)

  transform <- attr(trainage_DE,"UPLTM") != 0
  colnames(transform) <- str_remove_all(colnames(transform),"La_")
  transform <- abs(transform*cor(trainingset[,rownames(transform)])) # The weights are proportional to the observed correlation
  
  
  VertexSize <- attr(trainage_DE,"fscore") # The size depends on the variable independence relevance (fscore)
  names(VertexSize) <- str_remove_all(names(VertexSize),"La_")
  VertexSize <- 0.5+9.5*(VertexSize-min(VertexSize))/(max(VertexSize)-min(VertexSize)) # Normalization
  VertexSize <- VertexSize[colnames(transform)]
  gr <- graph_from_adjacency_matrix(transform,mode = "directed",diag = FALSE,weighted=TRUE)
  gr$layout <- layout_with_fr
  
  fc <- cluster_optimal(gr)
#          fc <- cluster_walktrap (gr,steps=50)

  plot(fc, gr,
       edge.width = 2*E(gr)$weight,
       vertex.size=VertexSize,
       edge.arrow.size=0.5,
       edge.arrow.width=0.75,
       vertex.label.color="purple",
#       vertex.label.cex=0.85,
#       vertex.label.dist=1.2,
       vertex.label.cex=(0.70 + 0.025*VertexSize),
       vertex.label.dist=(0.5 + 0.05*VertexSize),

       
       main="Feature Association")


par(op)

      varratios <- attr(trainage_DE,"VarRatio")
      names(varratios) <- str_remove_all(names(varratios),"La_")
      fscores <- attr(trainage_DE,"fscore") 
      names(fscores) <- str_remove_all(names(fscores),"La_")
      clustable <- as.data.frame(cbind(Variable=fc$names,
                                       Formula=as.character(theCharformulas[paste("La_",fc$names,sep="")]),
                                       Cluster=fc$membership,
                                       ResidualVariance=round(varratios[fc$names],3),
                                       Fscore=round(fscores[fc$names],3)
                                       )
                                 )
      rownames(clustable) <- str_replace_all(rownames(clustable),"__","_")
      clustable$Variable <- NULL
      clustable$Cluster <- as.integer(clustable$Cluster)
      clustable$ResidualVariance <- as.numeric(clustable$ResidualVariance)
      clustable$Fscore <- as.numeric(clustable$Fscore)
      clustable <- clustable[order(-clustable$Fscore),]
      clustable <- clustable[order(-clustable$ResidualVariance),]
      clustable <- clustable[order(clustable$Cluster),]

pander::pander(as.matrix(clustable))
  Formula Cluster ResidualVariance Fscore
E5 NA 1 1.000 13
E8 - (0.230)E2 + (0.258)E5 - (0.050)E7 + E8 1 0.835 0
E9 - (0.250)E5 + (0.575)E8 + E9 1 0.676 -2
E2 + E2 + (0.403)E5 + (0.217)E7 1 0.671 1
E10 + (0.353)E5 + (0.293)E7 + E10 1 0.637 -1
E6 - (0.409)E2 + (0.237)E5 + E6 1 0.634 -2
E1 + E1 - (0.175)E5 - (0.411)E7 1 0.630 -2
E7 - (0.704)E5 + E7 1 0.599 4
E3 + E3 - (0.357)E5 - (0.299)E7 1 0.579 -2
E4 - (0.297)E2 + E4 + (0.064)E5 + (0.139)E7 - (0.175)E8 - (0.190)E10 1 0.557 -5
N8 NA 2 1.000 11
N4 + N4 + (0.048)N8 + (0.346)N10 2 0.837 -2
N5 + N5 - (0.371)N6 - (0.176)N8 2 0.740 -2
N2 + (0.318)N1 + N2 + (0.137)N6 + (0.085)N8 2 0.738 -3
N6 + N6 - (0.525)N8 2 0.712 4
N3 - (0.488)N1 + N3 2 0.688 -1
N1 + N1 - (0.423)N6 - (0.199)N8 2 0.683 1
N10 + (0.172)E5 - (0.222)N6 - (0.409)N8 + N10 2 0.645 -2
N9 - (0.185)N1 - (0.520)N6 - (0.161)N8 + N9 2 0.566 -3
N7 + N7 - (0.738)N8 2 0.411 -1
A4 NA 3 1.000 8
A3 - (0.179)N8 + A3 + (0.363)A4 3 0.864 -2
A1 - (0.073)E5 + A1 + (0.286)A4 - (0.307)A7 3 0.843 -2
A10 - (0.321)E5 - (0.251)A4 + A10 3 0.784 -2
A8 - (0.311)A4 + A8 - (0.226)A9 3 0.750 -2
A7 + (0.238)E5 + (0.480)A4 + A7 3 0.729 1
A6 - (0.361)A4 + A6 - (0.269)A9 3 0.709 -2
A2 - (0.144)E5 + A2 + (0.553)A7 3 0.632 -2
A5 + (0.276)A4 + A5 - (0.573)A7 3 0.614 -2
A9 - (0.682)A4 + A9 3 0.571 1
C2 NA 4 1.000 5
C1 + C1 + (0.210)C2 4 0.931 6
C3 - (0.274)C1 + C3 4 0.909 0
C8 - (0.187)N8 + (0.336)C1 + C8 4 0.844 -2
C10 - (0.231)C1 - (0.271)C3 + C10 4 0.825 -2
C9 - (0.483)C1 + C9 4 0.819 1
C7 - (0.130)C1 + (0.116)C2 + C7 - (0.298)C9 4 0.807 -3
C5 - (0.289)C1 + (0.196)C2 + C5 - (0.315)C9 4 0.724 -3
C6 + (0.244)C1 - (0.472)C2 + C6 4 0.702 -1
C4 - (0.237)N8 - (0.200)C2 + C4 - (0.313)C6 4 0.684 -3
O1 NA 5 1.000 5
O5 - (0.260)O1 + O5 5 0.905 2
O2 + (0.329)O1 + O2 5 0.895 1
O3 - (0.153)O1 + O3 - (0.260)O5 5 0.886 0
O7 - (0.143)O1 + (0.183)O2 - (0.308)O5 + O7 5 0.748 -3
O6 + (0.564)O3 + O6 5 0.720 -1
O4 - (0.520)O2 + O4 5 0.715 -1
O8 - (0.710)O1 + O8 5 0.605 -1
O10 - (0.243)O3 - (0.651)O5 + O10 5 0.552 -2

1.4.8 Correlation Matrix of the Data

The heat map of the ILAA transformed data

par(op)

varlistDe <- colnames(trainage_DE)
varlistDe <- varlistDe[varlistDe != outcome]

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)

# Training    
  cormat <- cor(trainage_DE[,varlistDe],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Training: After ILAA Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  pander::pander(max(abs(cormat)))

0.199


  par(op)

# Testing

  cormat <- cor(testage_DE[,varlistDe],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Testing: After ILAA Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")

  diag(cormat) <- 0
  pander::pander(max(abs(cormat)))

0.213


  par(op)

  

1.4.9 Modeling

1.4.9.1 Modeling outcome using raw set

outcomeModel <- LASSO_1SE(formula(paste(outcome,"~.")),trainingset);
predOutcome <- predict(outcomeModel,testingset)
pander::pander(as.matrix(outcomeModel$coef))
(Intercept) 1.483869
gender 0.020497
E1 -0.000793
E2 0.002568
E5 0.004998
E6 -0.003737
E9 -0.008185
E10 -0.012682
N1 -0.005191
N2 -0.014562
N3 -0.006960
N5 0.002689
N6 0.000923
N7 -0.012670
N10 0.003161
A1 -0.007726
A2 -0.000213
A3 -0.012584
A4 0.001779
A5 0.005324
A6 0.008162
A7 0.002342
A8 -0.001047
A10 0.006571
C1 -0.003545
C2 0.005909
C3 -0.007768
C4 -0.009452
C5 0.004839
C6 -0.004175
C7 0.003688
C8 -0.019641
C10 0.005771
O1 0.011377
O3 -0.001174
O4 0.003396
O5 0.010032
O7 0.001588
O8 -0.000314
O9 0.006821
O10 -0.005317

1.4.9.2 Modeling outcome using decorrelated set

outcomeModel_DE <- LASSO_1SE(formula(paste(outcome,"~.")),trainage_DE);
predOutcome_DE <- predict(outcomeModel_DE,testage_DE)

pander::pander(as.matrix(outcomeModel_DE$coef),caption="ILAA Coef")
ILAA Coef
(Intercept) 1.505160
gender 0.019496
La_E1 -0.001356
La_E2 0.002897
La_E4 0.000102
E5 0.008812
La_E6 -0.004411
La_E8 0.001912
La_E9 -0.007779
La_E10 -0.011844
La_N1 -0.004476
La_N2 -0.014618
La_N3 -0.005560
La_N5 0.001879
La_N6 0.002153
La_N7 -0.015216
N8 -0.011527
La_N9 -0.001242
La_N10 0.002314
La_A1 -0.008082
La_A3 -0.011854
A4 0.010641
La_A5 0.005158
La_A6 0.007126
La_A7 0.003922
La_A8 -0.001010
La_A10 0.006163
La_C1 0.004373
La_C3 -0.004744
La_C4 -0.009627
La_C5 0.005318
La_C6 -0.008343
La_C7 0.003877
La_C8 -0.020095
La_C9 0.000825
La_C10 0.007372
O1 0.011827
La_O2 0.000396
La_O3 -0.003986
La_O4 0.002622
La_O5 0.005731
La_O7 0.001359
La_O8 -0.002888
O9 0.006708
La_O10 -0.007341
obsCoef <- getObservedCoef(trainage_DE,outcomeModel_DE)
pander::pander(as.matrix(obsCoef$coef),caption="ILAA Modeling")
ILAA Modeling
(Intercept) 1.505160
gender 0.019496
O9 0.006708
E1 -0.001356
E2 0.004232
E4 0.000102
E5 0.007375
E6 -0.004411
E7 -0.002361
E8 -0.002579
E9 -0.007779
E10 -0.011864
N1 -0.006191
N2 -0.014618
N3 -0.005560
N5 0.001879
N6 0.001472
N7 -0.015216
N8 0.005307
N9 -0.001242
N10 0.002314
A1 -0.008082
A3 -0.011854
A4 0.003521
A5 0.005158
A6 0.007126
A7 0.003446
A8 -0.001010
A9 -0.001687
A10 0.006163
C1 -0.007245
C2 0.008276
C3 -0.006744
C4 -0.009627
C5 0.005318
C6 -0.005334
C7 0.003877
C8 -0.020095
C9 -0.002005
C10 0.007372
O1 0.012934
O2 -0.000719
O3 -0.002202
O4 0.002622
O5 0.011133
O7 0.001359
O8 -0.002888
O10 -0.007341


pander::pander(cor.test(predOutcome,testingset[,outcome]),caption="Raw Model")
Raw Model
Test statistic df P value Alternative hypothesis cor
33.7 4824 2.05e-223 * * * two.sided 0.436
pander::pander(cor.test(predOutcome_DE,testage_DE[,outcome]),caption="ILAA-based Model")
ILAA-based Model
Test statistic df P value Alternative hypothesis cor
34 4824 5.33e-227 * * * two.sided 0.439

1.4.9.3 Univariate t-test


rawunittvalues <- apply(as.matrix(testingset[,names(outcomeModel$coef)[-1]]),2,tvals,testingset[,outcome])
names(rawunittvalues) <- names(outcomeModel$coef)[-1]

deunittvalues <- apply(testage_DE[,names(outcomeModel_DE$coef)[-1]],2,tvals,testingset[,outcome])

1.4.9.4 Comparing summaries


psig <- 0.1/(ncol(testingset)-1)
lmod <- lm(paste(outcome,"~."),testingset[,c(outcome,names(outcomeModel$coef)[-1])])
try(vifx <-vif(lm(paste(outcome,"~."),testingset[,c(outcome,names(outcomeModel$coef)[-1])])))
sm <- summary(lmod)
if (length(lmod$coef)>10)
{
  sm$coefficients[1,4] <- 1.0
  gcoef <- lmod$coef[sm$coefficients[,4]<psig]
  lmod <- lm(paste(outcome,"~."),testingset[,c(outcome,names(gcoef))])
  try(vifx <-vif(lm(paste(outcome,"~."),testingset[,c(outcome,names(gcoef))])))
}



sm <- summary(lmod)
smcoef <- as.data.frame(sm$coefficients)
smcoef <- smcoef[order(-abs(smcoef[,3])),]
smcoef$Uni_t_values <- rawunittvalues[rownames(smcoef)]
if (!inherits(vif,"try-error")) smcoef$vif <-vifx[rownames(smcoef)]
smcoef <- smcoef[!is.na(smcoef$Uni_t_values),]
if (nrow(smcoef)>10) smcoef <- smcoef[smcoef[,4]<psig,]

pander::pander(smcoef)
  Estimate Std. Error t value Pr(>|t|) Uni_t_values vif
O1 0.02304 0.00265 8.69 4.87e-18 9.467 1.84
C8 -0.01862 0.00225 -8.28 1.56e-16 -18.654 1.37
A3 -0.01653 0.00202 -8.17 3.83e-16 -14.764 1.25
N2 -0.01588 0.00216 -7.34 2.58e-13 -1.496 1.36
N7 -0.01449 0.00208 -6.95 4.00e-12 -14.874 1.56
E10 -0.01386 0.00210 -6.60 4.49e-11 -11.288 1.54
A1 -0.01121 0.00174 -6.44 1.30e-10 -10.467 1.21
C4 -0.01272 0.00220 -5.78 7.95e-09 -16.127 1.57
A7 0.01113 0.00226 4.92 8.75e-07 -4.492 1.40
gender 0.02249 0.00471 4.77 1.87e-06 2.235 1.11
C5 0.00950 0.00204 4.66 3.23e-06 12.496 1.36
O8 -0.01088 0.00235 -4.63 3.77e-06 -0.140 1.84
E5 0.01025 0.00222 4.63 3.81e-06 9.291 1.66
C10 0.01069 0.00237 4.51 6.50e-06 10.096 1.23
E9 -0.00811 0.00181 -4.48 7.74e-06 0.599 1.35
N6 0.00945 0.00225 4.20 2.76e-05 -6.137 1.87
N3 -0.00974 0.00249 -3.91 9.26e-05 -8.653 1.67
N5 0.00765 0.00202 3.78 1.56e-04 -4.218 1.38
N1 -0.00842 0.00225 -3.74 1.88e-04 -8.452 1.83
O9 0.00871 0.00234 3.73 1.93e-04 3.492 1.13
C3 -0.00860 0.00238 -3.61 3.06e-04 2.279 1.23
C2 0.00633 0.00183 3.46 5.40e-04 -5.052 1.31
pander::pander(t(c(R2=sm$r.squared,adj_R2=sm$adj.r.squared)))
R2 adj_R2
0.188 0.185
pander::pander(c(numvar=nrow(smcoef)))
numvar
22


lmod_DE <- lm(paste(outcome,"~."),testage_DE[,c(outcome,names(outcomeModel_DE$coef)[-1])])
try(vifx <-vif(lm(paste(outcome,"~."),testage_DE[,c(outcome,names(outcomeModel_DE$coef)[-1])])))

sm <- summary(lmod_DE)
if (length(lmod_DE$coef)>10)
{
  sm$coefficients[1,4] <- 1.0
  gcoef <- lmod_DE$coef[sm$coefficients[,4]<psig]
  lmod_DE <- lm(paste(outcome,"~."),testage_DE[,c(outcome,names(gcoef))])
  try(vifx <-vif(lm(paste(outcome,"~."),testage_DE[,c(outcome,names(gcoef))])))
}

sm <- summary(lmod_DE)
lacoef <- as.data.frame(sm$coefficients)
lacoef <- lacoef[order(-abs(lacoef[,3])),]
lacoef$Uni_t_values <- deunittvalues[rownames(lacoef)]
if (!inherits(vifx,"try-error")) lacoef$vif <-vifx[rownames(lacoef)]
lacoef <- lacoef[!is.na(lacoef$Uni_t_values),]
if (nrow(lacoef)>10) lacoef <- lacoef[lacoef[,4]<psig,]

lacoef$formula <- theCharformulas[rownames(lacoef)]
lacoef$VarRatio <- varratios[str_remove_all(rownames(lacoef),"La_")]

pander::pander(lacoef)
  Estimate Std. Error t value Pr(>|t|) Uni_t_values vif formula VarRatio
La_C8 -0.01871 0.00221 -8.45 3.91e-17 -14.670 1.14 - (0.187)N8 + (0.336)C1 + C8 0.844
La_N7 -0.02119 0.00268 -7.92 3.03e-15 -10.014 1.05 + N7 - (0.738)N8 0.411
La_A3 -0.01624 0.00206 -7.89 3.80e-15 -11.035 1.12 - (0.179)N8 + A3 + (0.363)A4 0.864
La_N2 -0.01590 0.00216 -7.35 2.27e-13 -7.321 1.07 + (0.318)N1 + N2 + (0.137)N6 + (0.085)N8 0.738
O1 0.01428 0.00206 6.95 4.24e-12 9.467 1.11 NA 1.000
E5 0.01163 0.00182 6.41 1.64e-10 9.291 1.12 NA 1.000
N8 -0.01084 0.00171 -6.32 2.84e-10 -10.878 1.14 NA 1.000
La_E10 -0.01330 0.00218 -6.11 1.10e-09 -7.452 1.10 + (0.353)E5 + (0.293)E7 + E10 0.637
La_C4 -0.01335 0.00221 -6.03 1.72e-09 -9.541 1.11 - (0.237)N8 - (0.200)C2 + C4 - (0.313)C6 0.684
La_A1 -0.01009 0.00178 -5.68 1.41e-08 -9.179 1.06 - (0.073)E5 + A1 + (0.286)A4 - (0.307)A7 0.843
La_C6 -0.00992 0.00192 -5.18 2.31e-07 -8.757 1.10 + (0.244)C1 - (0.472)C2 + C6 0.702
La_O8 -0.01190 0.00235 -5.07 4.11e-07 -7.975 1.07 - (0.710)O1 + O8 0.605
La_C10 0.01179 0.00239 4.94 8.12e-07 8.382 1.05 - (0.231)C1 - (0.271)C3 + C10 0.825
La_A7 0.01101 0.00230 4.79 1.69e-06 1.118 1.08 + (0.238)E5 + (0.480)A4 + A7 0.729
gender 0.02087 0.00469 4.45 8.93e-06 2.235 1.10 NA 1.000
La_N3 -0.01068 0.00246 -4.33 1.49e-05 -4.593 1.16 - (0.488)N1 + N3 0.688
A4 0.00950 0.00223 4.26 2.07e-05 6.858 1.15 NA 1.000
La_N6 0.00867 0.00208 4.17 3.13e-05 -0.403 1.11 + N6 - (0.525)N8 0.712
La_C5 0.00862 0.00214 4.03 5.73e-05 8.181 1.09 - (0.289)C1 + (0.196)C2 + C5 - (0.315)C9 0.724
La_E8 0.00742 0.00191 3.88 1.08e-04 3.424 1.09 - (0.230)E2 + (0.258)E5 - (0.050)E7 + E8 0.835
La_N1 -0.00707 0.00209 -3.38 7.33e-04 -4.312 1.09 + N1 - (0.423)N6 - (0.199)N8 0.683
O9 0.00758 0.00234 3.24 1.19e-03 3.492 1.13 NA 1.000
pander::pander(t(c(R2=sm$r.squared,adj_R2=sm$adj.r.squared)))
R2 adj_R2
0.188 0.184
pander::pander(c(numvar=nrow(lacoef)))
numvar
22


xvals <-c(min(c(deunittvalues,rawunittvalues))-3,max(c(deunittvalues,rawunittvalues))+3)

par(mfrow=c(1,2),cex=0.5)

plot(smcoef[,c(3,5)],
     main="Raw: Univariate t-values vs regression t-values",
     xlim=xvals,
     ylim=xvals
     )

lmtvals <- lm(smcoef[,5]~smcoef[,3])
pred <- lmtvals$coefficients[1] + lmtvals$coefficients[2] * xvals
lines(x=xvals,y=pred,col="red")
text(xvals[1]+(xvals[2]-xvals[1])/2,xvals[2]-1,sprintf("Slope= %.2f",lmtvals$coefficients[2]))


plot(lacoef[-1,c(3,5)],
     main="ILAA: Univariate t-values vs regression t-values",
     xlim=xvals,
     ylim=xvals
     )

lmtvals <- lm(lacoef[,5]~lacoef[,3])
pred <- lmtvals$coefficients[1] + lmtvals$coefficients[2] * xvals
lines(x=xvals,y=pred,col="red")
text(xvals[1]+(xvals[2]-xvals[1])/2,xvals[2]-1,sprintf("Slope= %.2f",lmtvals$coefficients[2]))


#pander::pander(summary(lmtvals))


pander::pander(cor.test(smcoef[,3],smcoef[,5]))
Pearson’s product-moment correlation: smcoef[, 3] and smcoef[, 5]
Test statistic df P value Alternative hypothesis cor
4.68 20 0.000143 * * * two.sided 0.723

pander::pander(cor.test(lacoef[,3],lacoef[,5]))
Pearson’s product-moment correlation: lacoef[, 3] and lacoef[, 5]
Test statistic df P value Alternative hypothesis cor
14.3 20 5.95e-12 * * * two.sided 0.954

par(op)

1.4.9.5 Ploting predictions

par(mfrow=c(1,3),cex=0.5)
plot(lmod$fitted.values,predOutcome,main="Raw: lm train predict vs. test predict",xlab="Train",ylab="Test")
plot(lmod_DE$fitted.values,predOutcome_DE,main="ILAA: lm train predict vs. test predict",xlab="Train",ylab="Test")

plot(predOutcome,predOutcome_DE,xlab="Raw Predicted",ylab="ILAA Predicted",main="Raw vs. ILAA")


par(op)

1.4.10 CV

1.4.10.1 test Correlations

par(op)
corresults <- CV_IDeA(dataframe,outcome,loops=loops)

………. ………. ……….


mintvals <- min(c(min(corresults$rawtValues),min(corresults$detValues)))
maxvals <- max(c(max(corresults$rawtValues),max(corresults$detValues)))
xvals <- c(mintvals,maxvals)

vioplot(list(raw=corresults$testRawCorrelations,ILAA=corresults$testDeCorrelations),
        ylab="Pearson Correlation",
        main="Test Correlations")


pander::pander(t.test(corresults$testDeCorrelations,corresults$testRawCorrelations,paired=TRUE))
Paired t-test: corresults$testDeCorrelations and corresults$testRawCorrelations
Test statistic df P value Alternative hypothesis mean difference
4.92 29 3.16e-05 * * * two.sided 0.00213

sylim <- c(1,min(c(20,max(corresults$VIFRaw))))
vioplot(list(raw=corresults$VIFRaw,ILAA=corresults$VIFDe),
        ylab="VIF",
        ylim=sylim,
        main="Test VIF")



pander::pander(summary(cbind(raw=corresults$VIFRaw,ILAA=corresults$VIFDe)))
raw ILAA
Min. :2.032 Min. :1.222
1st Qu.:2.099 1st Qu.:1.270
Median :2.177 Median :1.290
Mean :2.439 Mean :1.283
3rd Qu.:2.910 3rd Qu.:1.304
Max. :3.048 Max. :1.320
summary(corresults$VIFRaw)

Min. 1st Qu. Median Mean 3rd Qu. Max. 2.032 2.099 2.177 2.439 2.910 3.048

1.4.11 The t-values


par(op)
par(mfrow=c(1,2),cex=0.5)
plot(corresults$rawtValues,
     main="Raw: Univariate t-values vs Model t-values",
     xlab="Univariate",
     ylab="Model",
     xlim=xvals,
     ylim=xvals)

lmtvals <- lm(Model~.,corresults$rawtValues)
pred <- lmtvals$coefficients[1] + lmtvals$coefficients[2] * xvals
lines(x=xvals,y=pred,col="red")
text(xvals[1]+(xvals[2]-xvals[1])/2,xvals[2]-1,sprintf("Slope= %.2f",lmtvals$coefficients[2]))

pander::pander(summary(lmtvals))
  Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0717 0.0928 0.773 4.40e-01
Uni 0.3111 0.0110 28.213 9.91e-136
Fitting linear model: Model ~ .
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
1244 3.21 0.391 0.39

plot(corresults$detValues,
      main="ILAA: Univariate t-values vs Model t-values",
     xlab="Univariate",
     ylab="Model",
     xlim=xvals,
     ylim=xvals)

lmtvals <- lm(Model~.,corresults$detValues)
pred <- lmtvals$coefficients[1] + lmtvals$coefficients[2] * xvals
lines(x=xvals,y=pred,col="red")
text(xvals[1]+(xvals[2]-xvals[1])/2,xvals[2]-1,sprintf("Slope= %.2f",lmtvals$coefficients[2]))


pander::pander(summary(lmtvals))
  Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.339 0.05405 6.27 4.81e-10
Uni 0.620 0.00846 73.24 0.00e+00
Fitting linear model: Model ~ .
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
1272 1.89 0.809 0.808

1.5 PCA, EFA, PLS, ERT


toPCA <- sapply(apply(dataframe,2,unique),length) >= 5 & colnames(dataframe) != outcome

pc <- prcomp(dataframe[,toPCA],center = TRUE,scale. = TRUE,tol=0.01)   #principal components

if (ncol(dataframe)<20)
{
pander::pander(as.data.frame(pc$rotation),caption="PCA")
}

rotstd <- log10(abs(100*pc$rotation)+1.0)
  gplots::heatmap.2(rotstd,
                    trace = "none",
                    dendrogram="none",
                    breaks=c(0,0.5,1,2,3),
#                    scale="row",
                    mar = c(5,5),
                    col=rainbow(4),
                    main = "PCA Rotation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                    Rowv=FALSE,
                    Colv=FALSE,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="log(|100Rot|+1)",
                    xlab="Output Feature", ylab="Input Feature")



efa <- try(fa(dataframe[,toPCA],ncol(pc$rotation),rotate="varimax",warnings=FALSE))  # EFA analysis

if (!inherits(efa,"try-error"))
{
  if (ncol(dataframe)<20)
  {
   pander::pander(as.data.frame(efa$weights),caption="EFA")
  }
  rotstd <- log10(abs(100*efa$weights)+1.0)
    gplots::heatmap.2(rotstd,
                      trace = "none",
                      dendrogram="none",
                      breaks=c(0,0.5,1,2,3),
  #                    scale="row",
                      mar = c(5,5),
                      col=rainbow(4),
                      main = "EFA weights",
                      cexRow = cexheat,
                      cexCol = cexheat,
                      Rowv=FALSE,
                      Colv=FALSE,
                     srtCol=45,
                     srtRow=45,
                      key.title=NA,
                      key.xlab="log(|100W|+1)",
                      xlab="Output Feature", ylab="Input Feature")
}


  
  
plm <- plsr(formula=formula(paste(outcome,"~.")),data=dataframe,scale =TRUE)
if (ncol(dataframe)<20)
{
  lds <- plm$loadings
  lds2 <- matrix(as.numeric(lds),nrow=nrow(lds),ncol=ncol(lds))
  colnames(lds2) <- colnames(lds)
  rownames(lds2) <- rownames(lds)
  pander::pander(lds2,caption="PLS")
}

loadadings <- log10(abs(100*plm$loadings) + 1.0)
  gplots::heatmap.2(loadadings,
                    breaks=c(0,0.5,1,2,3),
                    trace = "none",
                    dendrogram="none",
#                    scale="row",
                    mar = c(5,5),
                    col=rainbow(4),
                    main = "PLS Loadings",
                    cexRow = cexheat,
                    cexCol = cexheat,
                    Rowv=FALSE,
                    Colv=FALSE,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="log(|100Beta|+1)",
                    xlab="Output Feature", ylab="Input Feature")



ERTmod <- ILAA(dataframe,Outcome = outcome,thr=thro)

ERT <- log10(abs(100*attr(ERTmod,"UPLTM")) + 1);
  gplots::heatmap.2(ERT,
                    trace = "none",
                    breaks=c(0,0.5,1,2,3),
                    mar = c(5,5),
                    col=rainbow(4),
                    main = "ERT Rotation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                    dendrogram="none",
                    Rowv=FALSE,
                    Colv=FALSE,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="log(|100Beta|+1)",
                    xlab="Output Feature", ylab="Input Feature")


if (ncol(dataframe)<20)
{
pander::pander(attr(ERTmod,"UPLTM"),caption="ERT")
}

1.6 U-MAP Visualization of features

1.6.1 The UMAP on Raw Data

  thesamples <- c(1:nrow(dataframe));
  if (nrow(dataframe)>2000) 
  {
    thesamples <- sample(thesamples,2000)
  }

  classes <- as.integer(scale(dataframe[thesamples,outcome]))
  classes <- classes - min(classes) + 1
  raincolors <- heat.colors(length(unique(classes)))
  dtatoplot <- as.matrix(FRESAScale(dataframe[thesamples,],method="OrderLogit")$scaledData)
  datasetframe.umap = umap(dtatoplot,n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: RAW",col=raincolors[classes],pch=15)

  
    gplots::heatmap.2(dtatoplot,
                    trace = "none",
                    mar = c(5,5),
                    col=heat.colors(5),
                    main = "Raw",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="Z",
                    xlab="Feature", ylab="Subject")




  dtatoplot <- as.matrix(FRESAScale(predict(pc,dataframe[thesamples,]),method="OrderLogit")$scaledData)
  datasetframe.umap = umap(dtatoplot,n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: PCA",col=raincolors[classes],pch=15)


    gplots::heatmap.2(dtatoplot,
                    trace = "none",
                    mar = c(5,5),
                    col=heat.colors(5),
                    main = "PCA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="Z",
                    xlab="Feature", ylab="Subject")


  
if (!inherits(efa,"try-error"))
{
  dtatoplot <- as.matrix(FRESAScale(predict(efa,dataframe[thesamples,toPCA]),method="OrderLogit")$scaledData)
  datasetframe.umap = umap(dtatoplot,n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: EFA",col=raincolors[classes],pch=15)
    gplots::heatmap.2(dtatoplot,
                    trace = "none",
                    mar = c(5,5),
                    col=heat.colors(5),
                    main = "EFA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="Z",
                    xlab="Feature", ylab="Subject")
}

  rotframe <- as.matrix(scale(dataframe[thesamples,rownames(plm$loadings)])) %*% plm$loadings
  
  dtatoplot <- as.matrix(FRESAScale(rotframe,method="OrderLogit")$scaledData)
  datasetframe.umap = umap(dtatoplot,n_components=2)
  
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: PLS",col=raincolors[classes],pch=15)

      gplots::heatmap.2(dtatoplot,
                    trace = "none",
                    mar = c(5,5),
                    col=heat.colors(5),
                    main = "PLS",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="Z",
                    xlab="Feature", ylab="Subject")



  dtatoplot <- as.matrix(FRESAScale(ERTmod[thesamples,colnames(ERTmod) != outcome],method="OrderLogit")$scaledData)
  datasetframe.umap = umap(dtatoplot,n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: ERT",col=raincolors[classes],pch=15)

      gplots::heatmap.2(dtatoplot,
                    trace = "none",
                    mar = c(5,5),
                    col=heat.colors(5),
                    main = "ERT",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="Z",
                    xlab="Feature", ylab="Subject")

1.6.2 The age plots



plot(10^predOutcome,10^testingset[,outcome],xlab="Raw Predicted",ylab=outcome,main="Age Prediction")

plot(10^predOutcome_DE,10^testingset[,outcome],xlab="IDeA Predicted",ylab=outcome,main="Age Prediction")